You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ho...@apache.org on 2017/01/25 22:44:24 UTC

spark git commit: [SPARK-19064][PYSPARK] Fix pip installing of sub components

Repository: spark
Updated Branches:
  refs/heads/master 92afaa93a -> 965c82d8c


[SPARK-19064][PYSPARK] Fix pip installing of sub components

## What changes were proposed in this pull request?

Fix instalation of mllib and ml sub components, and more eagerly cleanup cache files during test script & make-distribution.

## How was this patch tested?

Updated sanity test script to import mllib and ml sub-components.

Author: Holden Karau <ho...@us.ibm.com>

Closes #16465 from holdenk/SPARK-19064-fix-pip-install-sub-components.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/965c82d8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/965c82d8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/965c82d8

Branch: refs/heads/master
Commit: 965c82d8c4b7f2d4dfbc45ec4d47d6b6588094c3
Parents: 92afaa9
Author: Holden Karau <ho...@us.ibm.com>
Authored: Wed Jan 25 14:43:39 2017 -0800
Committer: Holden Karau <ho...@us.ibm.com>
Committed: Wed Jan 25 14:43:39 2017 -0800

----------------------------------------------------------------------
 dev/make-distribution.sh | 2 ++
 dev/pip-sanity-check.py  | 2 ++
 dev/requirements.txt     | 1 +
 dev/run-pip-tests        | 7 +++++--
 python/setup.py          | 5 +++++
 5 files changed, 15 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/dev/make-distribution.sh
----------------------------------------------------------------------
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index 47ff504..6fb25f3 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -220,6 +220,8 @@ cp -r "$SPARK_HOME/data" "$DISTDIR"
 if [ "$MAKE_PIP" == "true" ]; then
   echo "Building python distribution package"
   pushd "$SPARK_HOME/python" > /dev/null
+  # Delete the egg info file if it exists, this can cache older setup files.
+  rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
   python setup.py sdist
   popd > /dev/null
 else

http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/dev/pip-sanity-check.py
----------------------------------------------------------------------
diff --git a/dev/pip-sanity-check.py b/dev/pip-sanity-check.py
index 430c2ab..c491005 100644
--- a/dev/pip-sanity-check.py
+++ b/dev/pip-sanity-check.py
@@ -18,6 +18,8 @@
 from __future__ import print_function
 
 from pyspark.sql import SparkSession
+from pyspark.ml.param import Params
+from pyspark.mllib.linalg import *
 import sys
 
 if __name__ == "__main__":

http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/dev/requirements.txt
----------------------------------------------------------------------
diff --git a/dev/requirements.txt b/dev/requirements.txt
index bf042d2..7978227 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -1,3 +1,4 @@
 jira==1.0.3
 PyGithub==1.26.0
 Unidecode==0.04.19
+pypandoc==1.3.3

http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/dev/run-pip-tests
----------------------------------------------------------------------
diff --git a/dev/run-pip-tests b/dev/run-pip-tests
index e1da18e..af1b1fe 100755
--- a/dev/run-pip-tests
+++ b/dev/run-pip-tests
@@ -78,11 +78,14 @@ for python in "${PYTHON_EXECS[@]}"; do
     mkdir -p "$VIRTUALENV_PATH"
     virtualenv --python=$python "$VIRTUALENV_PATH"
     source "$VIRTUALENV_PATH"/bin/activate
-    # Upgrade pip
-    pip install --upgrade pip
+    # Upgrade pip & friends
+    pip install --upgrade pip pypandoc wheel
+    pip install numpy # Needed so we can verify mllib imports
 
     echo "Creating pip installable source dist"
     cd "$FWDIR"/python
+    # Delete the egg info file if it exists, this can cache the setup file.
+    rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
     $python setup.py sdist
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/python/setup.py
----------------------------------------------------------------------
diff --git a/python/setup.py b/python/setup.py
index bc2eb4c..47eab98 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -162,7 +162,12 @@ try:
         url='https://github.com/apache/spark/tree/master/python',
         packages=['pyspark',
                   'pyspark.mllib',
+                  'pyspark.mllib.linalg',
+                  'pyspark.mllib.stat',
                   'pyspark.ml',
+                  'pyspark.ml.linalg',
+                  'pyspark.ml.param',
+                  'pyspark.ml.stat',
                   'pyspark.sql',
                   'pyspark.streaming',
                   'pyspark.bin',


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org