You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ho...@apache.org on 2017/03/29 18:41:22 UTC

spark git commit: [SPARK-19955][PYSPARK] Jenkins Python Conda based test.

Repository: spark
Updated Branches:
  refs/heads/master c622a87c4 -> d6ddfdf60


[SPARK-19955][PYSPARK] Jenkins Python Conda based test.

## What changes were proposed in this pull request?

Allow Jenkins Python tests to use the installed conda to test Python 2.7 support & test pip installability.

## How was this patch tested?

Updated shell scripts, ran tests locally with installed conda, ran tests in Jenkins.

Author: Holden Karau <ho...@us.ibm.com>

Closes #17355 from holdenk/SPARK-19955-support-python-tests-with-conda.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d6ddfdf6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d6ddfdf6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d6ddfdf6

Branch: refs/heads/master
Commit: d6ddfdf60e77340256873b5acf08e85f95cf3bc2
Parents: c622a87
Author: Holden Karau <ho...@us.ibm.com>
Authored: Wed Mar 29 11:41:17 2017 -0700
Committer: Holden Karau <ho...@us.ibm.com>
Committed: Wed Mar 29 11:41:17 2017 -0700

----------------------------------------------------------------------
 dev/run-pip-tests     | 66 +++++++++++++++++++++++++++++-----------------
 dev/run-tests-jenkins |  3 ++-
 python/run-tests.py   |  6 ++---
 3 files changed, 47 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d6ddfdf6/dev/run-pip-tests
----------------------------------------------------------------------
diff --git a/dev/run-pip-tests b/dev/run-pip-tests
index af1b1fe..d51dde1 100755
--- a/dev/run-pip-tests
+++ b/dev/run-pip-tests
@@ -35,9 +35,28 @@ function delete_virtualenv() {
 }
 trap delete_virtualenv EXIT
 
+PYTHON_EXECS=()
 # Some systems don't have pip or virtualenv - in those cases our tests won't work.
-if ! hash virtualenv 2>/dev/null; then
-  echo "Missing virtualenv skipping pip installability tests."
+if hash virtualenv 2>/dev/null && [ ! -n "$USE_CONDA" ]; then
+  echo "virtualenv installed - using. Note if this is a conda virtual env you may wish to set USE_CONDA"
+  # Figure out which Python execs we should test pip installation with
+  if hash python2 2>/dev/null; then
+    # We do this since we are testing with virtualenv and the default virtual env python
+    # is in /usr/bin/python
+    PYTHON_EXECS+=('python2')
+  elif hash python 2>/dev/null; then
+    # If python2 isn't installed fallback to python if available
+    PYTHON_EXECS+=('python')
+  fi
+  if hash python3 2>/dev/null; then
+    PYTHON_EXECS+=('python3')
+  fi
+elif hash conda 2>/dev/null; then
+  echo "Using conda virtual enviroments"
+  PYTHON_EXECS=('3.5')
+  USE_CONDA=1
+else
+  echo "Missing virtualenv & conda, skipping pip installability tests"
   exit 0
 fi
 if ! hash pip 2>/dev/null; then
@@ -45,22 +64,8 @@ if ! hash pip 2>/dev/null; then
   exit 0
 fi
 
-# Figure out which Python execs we should test pip installation with
-PYTHON_EXECS=()
-if hash python2 2>/dev/null; then
-  # We do this since we are testing with virtualenv and the default virtual env python
-  # is in /usr/bin/python
-  PYTHON_EXECS+=('python2')
-elif hash python 2>/dev/null; then
-  # If python2 isn't installed fallback to python if available
-  PYTHON_EXECS+=('python')
-fi
-if hash python3 2>/dev/null; then
-  PYTHON_EXECS+=('python3')
-fi
-
 # Determine which version of PySpark we are building for archive name
-PYSPARK_VERSION=$(python -c "exec(open('python/pyspark/version.py').read());print __version__")
+PYSPARK_VERSION=$(python3 -c "exec(open('python/pyspark/version.py').read());print(__version__)")
 PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz"
 # The pip install options we use for all the pip commands
 PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall "
@@ -75,18 +80,24 @@ for python in "${PYTHON_EXECS[@]}"; do
     echo "Using $VIRTUALENV_BASE for virtualenv"
     VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python
     rm -rf "$VIRTUALENV_PATH"
-    mkdir -p "$VIRTUALENV_PATH"
-    virtualenv --python=$python "$VIRTUALENV_PATH"
-    source "$VIRTUALENV_PATH"/bin/activate
-    # Upgrade pip & friends
-    pip install --upgrade pip pypandoc wheel
-    pip install numpy # Needed so we can verify mllib imports
+    if [ -n "$USE_CONDA" ]; then
+      conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip setuptools
+      source activate "$VIRTUALENV_PATH"
+    else
+      mkdir -p "$VIRTUALENV_PATH"
+      virtualenv --python=$python "$VIRTUALENV_PATH"
+      source "$VIRTUALENV_PATH"/bin/activate
+    fi
+    # Upgrade pip & friends if using virutal env
+    if [ ! -n "USE_CONDA" ]; then
+      pip install --upgrade pip pypandoc wheel numpy
+    fi
 
     echo "Creating pip installable source dist"
     cd "$FWDIR"/python
     # Delete the egg info file if it exists, this can cache the setup file.
     rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
-    $python setup.py sdist
+    python setup.py sdist
 
 
     echo "Installing dist into virtual env"
@@ -112,6 +123,13 @@ for python in "${PYTHON_EXECS[@]}"; do
 
     cd "$FWDIR"
 
+    # conda / virtualenv enviroments need to be deactivated differently
+    if [ -n "$USE_CONDA" ]; then
+      source deactivate
+    else
+      deactivate
+    fi
+
   done
 done
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d6ddfdf6/dev/run-tests-jenkins
----------------------------------------------------------------------
diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins
index e79accf..f41f1ac 100755
--- a/dev/run-tests-jenkins
+++ b/dev/run-tests-jenkins
@@ -22,7 +22,8 @@
 # Environment variables are populated by the code here:
 #+ https://github.com/jenkinsci/ghprb-plugin/blob/master/src/main/java/org/jenkinsci/plugins/ghprb/GhprbTrigger.java#L139
 
-FWDIR="$(cd "`dirname $0`"/..; pwd)"
+FWDIR="$( cd "$( dirname "$0" )/.." && pwd )"
 cd "$FWDIR"
 
+export PATH=/home/anaconda/bin:$PATH
 exec python -u ./dev/run-tests-jenkins.py "$@"

http://git-wip-us.apache.org/repos/asf/spark/blob/d6ddfdf6/python/run-tests.py
----------------------------------------------------------------------
diff --git a/python/run-tests.py b/python/run-tests.py
index 53a0aef..b2e5043 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -111,9 +111,9 @@ def run_individual_python_test(test_name, pyspark_python):
 
 
 def get_default_python_executables():
-    python_execs = [x for x in ["python2.6", "python3.4", "pypy"] if which(x)]
-    if "python2.6" not in python_execs:
-        LOGGER.warning("Not testing against `python2.6` because it could not be found; falling"
+    python_execs = [x for x in ["python2.7", "python3.4", "pypy"] if which(x)]
+    if "python2.7" not in python_execs:
+        LOGGER.warning("Not testing against `python2.7` because it could not be found; falling"
                        " back to `python` instead")
         python_execs.insert(0, "python")
     return python_execs


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org