You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ho...@apache.org on 2017/01/25 22:44:24 UTC
spark git commit: [SPARK-19064][PYSPARK] Fix pip installing of sub
components
Repository: spark
Updated Branches:
refs/heads/master 92afaa93a -> 965c82d8c
[SPARK-19064][PYSPARK] Fix pip installing of sub components
## What changes were proposed in this pull request?
Fix instalation of mllib and ml sub components, and more eagerly cleanup cache files during test script & make-distribution.
## How was this patch tested?
Updated sanity test script to import mllib and ml sub-components.
Author: Holden Karau <ho...@us.ibm.com>
Closes #16465 from holdenk/SPARK-19064-fix-pip-install-sub-components.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/965c82d8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/965c82d8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/965c82d8
Branch: refs/heads/master
Commit: 965c82d8c4b7f2d4dfbc45ec4d47d6b6588094c3
Parents: 92afaa9
Author: Holden Karau <ho...@us.ibm.com>
Authored: Wed Jan 25 14:43:39 2017 -0800
Committer: Holden Karau <ho...@us.ibm.com>
Committed: Wed Jan 25 14:43:39 2017 -0800
----------------------------------------------------------------------
dev/make-distribution.sh | 2 ++
dev/pip-sanity-check.py | 2 ++
dev/requirements.txt | 1 +
dev/run-pip-tests | 7 +++++--
python/setup.py | 5 +++++
5 files changed, 15 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/dev/make-distribution.sh
----------------------------------------------------------------------
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index 47ff504..6fb25f3 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -220,6 +220,8 @@ cp -r "$SPARK_HOME/data" "$DISTDIR"
if [ "$MAKE_PIP" == "true" ]; then
echo "Building python distribution package"
pushd "$SPARK_HOME/python" > /dev/null
+ # Delete the egg info file if it exists, this can cache older setup files.
+ rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
python setup.py sdist
popd > /dev/null
else
http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/dev/pip-sanity-check.py
----------------------------------------------------------------------
diff --git a/dev/pip-sanity-check.py b/dev/pip-sanity-check.py
index 430c2ab..c491005 100644
--- a/dev/pip-sanity-check.py
+++ b/dev/pip-sanity-check.py
@@ -18,6 +18,8 @@
from __future__ import print_function
from pyspark.sql import SparkSession
+from pyspark.ml.param import Params
+from pyspark.mllib.linalg import *
import sys
if __name__ == "__main__":
http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/dev/requirements.txt
----------------------------------------------------------------------
diff --git a/dev/requirements.txt b/dev/requirements.txt
index bf042d2..7978227 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -1,3 +1,4 @@
jira==1.0.3
PyGithub==1.26.0
Unidecode==0.04.19
+pypandoc==1.3.3
http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/dev/run-pip-tests
----------------------------------------------------------------------
diff --git a/dev/run-pip-tests b/dev/run-pip-tests
index e1da18e..af1b1fe 100755
--- a/dev/run-pip-tests
+++ b/dev/run-pip-tests
@@ -78,11 +78,14 @@ for python in "${PYTHON_EXECS[@]}"; do
mkdir -p "$VIRTUALENV_PATH"
virtualenv --python=$python "$VIRTUALENV_PATH"
source "$VIRTUALENV_PATH"/bin/activate
- # Upgrade pip
- pip install --upgrade pip
+ # Upgrade pip & friends
+ pip install --upgrade pip pypandoc wheel
+ pip install numpy # Needed so we can verify mllib imports
echo "Creating pip installable source dist"
cd "$FWDIR"/python
+ # Delete the egg info file if it exists, this can cache the setup file.
+ rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
$python setup.py sdist
http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/python/setup.py
----------------------------------------------------------------------
diff --git a/python/setup.py b/python/setup.py
index bc2eb4c..47eab98 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -162,7 +162,12 @@ try:
url='https://github.com/apache/spark/tree/master/python',
packages=['pyspark',
'pyspark.mllib',
+ 'pyspark.mllib.linalg',
+ 'pyspark.mllib.stat',
'pyspark.ml',
+ 'pyspark.ml.linalg',
+ 'pyspark.ml.param',
+ 'pyspark.ml.stat',
'pyspark.sql',
'pyspark.streaming',
'pyspark.bin',
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org