You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2016/02/20 10:07:24 UTC
spark git commit: [SPARK-13302][PYSPARK][TESTS] Move the temp file
creation and cleanup outside of the doctests
Repository: spark
Updated Branches:
refs/heads/master dfb2ae2f1 -> 9ca79c1ec
[SPARK-13302][PYSPARK][TESTS] Move the temp file creation and cleanup outside of the doctests
Some of the new doctests in ml/clustering.py have a lot of setup code, move the setup code to the general test init to keep the doctest more example-style looking.
In part this is a follow up to https://github.com/apache/spark/pull/10999
Note that the same pattern is followed in regression & recommendation - might as well clean up all three at the same time.
Author: Holden Karau <ho...@us.ibm.com>
Closes #11197 from holdenk/SPARK-13302-cleanup-doctests-in-ml-clustering.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ca79c1e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ca79c1e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ca79c1e
Branch: refs/heads/master
Commit: 9ca79c1ece5ad139719e4eea9f7d1b59aed01b20
Parents: dfb2ae2
Author: Holden Karau <ho...@us.ibm.com>
Authored: Sat Feb 20 09:07:19 2016 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Sat Feb 20 09:07:19 2016 +0000
----------------------------------------------------------------------
python/pyspark/ml/clustering.py | 25 ++++++++++++++-----------
python/pyspark/ml/recommendation.py | 25 ++++++++++++++-----------
python/pyspark/ml/regression.py | 25 ++++++++++++++-----------
3 files changed, 42 insertions(+), 33 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/9ca79c1e/python/pyspark/ml/clustering.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 91278d5..611b919 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -70,25 +70,18 @@ class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol
True
>>> rows[2].prediction == rows[3].prediction
True
- >>> import os, tempfile
- >>> path = tempfile.mkdtemp()
- >>> kmeans_path = path + "/kmeans"
+ >>> kmeans_path = temp_path + "/kmeans"
>>> kmeans.save(kmeans_path)
>>> kmeans2 = KMeans.load(kmeans_path)
>>> kmeans2.getK()
2
- >>> model_path = path + "/kmeans_model"
+ >>> model_path = temp_path + "/kmeans_model"
>>> model.save(model_path)
>>> model2 = KMeansModel.load(model_path)
>>> model.clusterCenters()[0] == model2.clusterCenters()[0]
array([ True, True], dtype=bool)
>>> model.clusterCenters()[1] == model2.clusterCenters()[1]
array([ True, True], dtype=bool)
- >>> from shutil import rmtree
- >>> try:
- ... rmtree(path)
- ... except OSError:
- ... pass
.. versionadded:: 1.5.0
"""
@@ -310,7 +303,17 @@ if __name__ == "__main__":
sqlContext = SQLContext(sc)
globs['sc'] = sc
globs['sqlContext'] = sqlContext
- (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
- sc.stop()
+ import tempfile
+ temp_path = tempfile.mkdtemp()
+ globs['temp_path'] = temp_path
+ try:
+ (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+ sc.stop()
+ finally:
+ from shutil import rmtree
+ try:
+ rmtree(temp_path)
+ except OSError:
+ pass
if failure_count:
exit(-1)
http://git-wip-us.apache.org/repos/asf/spark/blob/9ca79c1e/python/pyspark/ml/recommendation.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index ef94488..2b605e5 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -82,14 +82,12 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
Row(user=1, item=0, prediction=2.6258413791656494)
>>> predictions[2]
Row(user=2, item=0, prediction=-1.5018409490585327)
- >>> import os, tempfile
- >>> path = tempfile.mkdtemp()
- >>> als_path = path + "/als"
+ >>> als_path = temp_path + "/als"
>>> als.save(als_path)
>>> als2 = ALS.load(als_path)
>>> als.getMaxIter()
5
- >>> model_path = path + "/als_model"
+ >>> model_path = temp_path + "/als_model"
>>> model.save(model_path)
>>> model2 = ALSModel.load(model_path)
>>> model.rank == model2.rank
@@ -98,11 +96,6 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
True
>>> sorted(model.itemFactors.collect()) == sorted(model2.itemFactors.collect())
True
- >>> from shutil import rmtree
- >>> try:
- ... rmtree(path)
- ... except OSError:
- ... pass
.. versionadded:: 1.4.0
"""
@@ -340,7 +333,17 @@ if __name__ == "__main__":
sqlContext = SQLContext(sc)
globs['sc'] = sc
globs['sqlContext'] = sqlContext
- (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
- sc.stop()
+ import tempfile
+ temp_path = tempfile.mkdtemp()
+ globs['temp_path'] = temp_path
+ try:
+ (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+ sc.stop()
+ finally:
+ from shutil import rmtree
+ try:
+ rmtree(temp_path)
+ except OSError:
+ pass
if failure_count:
exit(-1)
http://git-wip-us.apache.org/repos/asf/spark/blob/9ca79c1e/python/pyspark/ml/regression.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 20dc6c2..de4a751 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -68,25 +68,18 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
Traceback (most recent call last):
...
TypeError: Method setParams forces keyword arguments.
- >>> import os, tempfile
- >>> path = tempfile.mkdtemp()
- >>> lr_path = path + "/lr"
+ >>> lr_path = temp_path + "/lr"
>>> lr.save(lr_path)
>>> lr2 = LinearRegression.load(lr_path)
>>> lr2.getMaxIter()
5
- >>> model_path = path + "/lr_model"
+ >>> model_path = temp_path + "/lr_model"
>>> model.save(model_path)
>>> model2 = LinearRegressionModel.load(model_path)
>>> model.coefficients[0] == model2.coefficients[0]
True
>>> model.intercept == model2.intercept
True
- >>> from shutil import rmtree
- >>> try:
- ... rmtree(path)
- ... except OSError:
- ... pass
.. versionadded:: 1.4.0
"""
@@ -850,7 +843,17 @@ if __name__ == "__main__":
sqlContext = SQLContext(sc)
globs['sc'] = sc
globs['sqlContext'] = sqlContext
- (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
- sc.stop()
+ import tempfile
+ temp_path = tempfile.mkdtemp()
+ globs['temp_path'] = temp_path
+ try:
+ (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+ sc.stop()
+ finally:
+ from shutil import rmtree
+ try:
+ rmtree(temp_path)
+ except OSError:
+ pass
if failure_count:
exit(-1)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org