You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by da...@apache.org on 2015/06/26 17:12:45 UTC

spark git commit: [SPARK-8652] [PYSPARK] Check return value for all uses of doctest.testmod()

Repository: spark
Updated Branches:
  refs/heads/master 37bf76a2d -> 41afa1650


[SPARK-8652] [PYSPARK] Check return value for all uses of doctest.testmod()

This patch addresses a critical issue in the PySpark tests:

Several of our Python modules' `__main__` methods call `doctest.testmod()` in order to run doctests but forget to check and handle its return value. As a result, some PySpark test failures can go unnoticed because they will not fail the build.

Fortunately, there was only one test failure which was masked by this bug: a `pyspark.profiler` doctest was failing due to changes in RDD pipelining.

Author: Josh Rosen <jo...@databricks.com>

Closes #7032 from JoshRosen/testmod-fix and squashes the following commits:

60dbdc0 [Josh Rosen] Account for int vs. long formatting change in Python 3
8b8d80a [Josh Rosen] Fix failing test.
e6423f9 [Josh Rosen] Check return code for all uses of doctest.testmod().


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/41afa165
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/41afa165
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/41afa165

Branch: refs/heads/master
Commit: 41afa16500e682475eaa80e31c0434b7ab66abcb
Parents: 37bf76a
Author: Josh Rosen <jo...@databricks.com>
Authored: Fri Jun 26 08:12:22 2015 -0700
Committer: Davies Liu <da...@databricks.com>
Committed: Fri Jun 26 08:12:22 2015 -0700

----------------------------------------------------------------------
 dev/merge_spark_pr.py            | 4 +++-
 python/pyspark/accumulators.py   | 4 +++-
 python/pyspark/broadcast.py      | 4 +++-
 python/pyspark/heapq3.py         | 5 +++--
 python/pyspark/profiler.py       | 8 ++++++--
 python/pyspark/serializers.py    | 8 +++++---
 python/pyspark/shuffle.py        | 4 +++-
 python/pyspark/streaming/util.py | 4 +++-
 8 files changed, 29 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/dev/merge_spark_pr.py
----------------------------------------------------------------------
diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index cd83b35..cf827ce 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -431,6 +431,8 @@ def main():
 
 if __name__ == "__main__":
     import doctest
-    doctest.testmod()
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        exit(-1)
     
     main()

http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/python/pyspark/accumulators.py
----------------------------------------------------------------------
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index adca90d..6ef8cf5 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -264,4 +264,6 @@ def _start_update_server():
 
 if __name__ == "__main__":
     import doctest
-    doctest.testmod()
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        exit(-1)

http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/python/pyspark/broadcast.py
----------------------------------------------------------------------
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index 3de4615..663c9ab 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -115,4 +115,6 @@ class Broadcast(object):
 
 if __name__ == "__main__":
     import doctest
-    doctest.testmod()
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        exit(-1)

http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/python/pyspark/heapq3.py
----------------------------------------------------------------------
diff --git a/python/pyspark/heapq3.py b/python/pyspark/heapq3.py
index 4ef2afe..b27e91a 100644
--- a/python/pyspark/heapq3.py
+++ b/python/pyspark/heapq3.py
@@ -883,6 +883,7 @@ except ImportError:
 
 
 if __name__ == "__main__":
-
     import doctest
-    print(doctest.testmod())
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        exit(-1)

http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/python/pyspark/profiler.py
----------------------------------------------------------------------
diff --git a/python/pyspark/profiler.py b/python/pyspark/profiler.py
index d18daaa..44d17bd 100644
--- a/python/pyspark/profiler.py
+++ b/python/pyspark/profiler.py
@@ -90,9 +90,11 @@ class Profiler(object):
     >>> sc = SparkContext('local', 'test', conf=conf, profiler_cls=MyCustomProfiler)
     >>> sc.parallelize(range(1000)).map(lambda x: 2 * x).take(10)
     [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
+    >>> sc.parallelize(range(1000)).count()
+    1000
     >>> sc.show_profiles()
     My custom profiles for RDD:1
-    My custom profiles for RDD:2
+    My custom profiles for RDD:3
     >>> sc.stop()
     """
 
@@ -169,4 +171,6 @@ class BasicProfiler(Profiler):
 
 if __name__ == "__main__":
     import doctest
-    doctest.testmod()
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        exit(-1)

http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/python/pyspark/serializers.py
----------------------------------------------------------------------
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 7f9d0a3..411b4db 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -44,8 +44,8 @@ which contains two batches of two objects:
 
 >>> rdd.glom().collect()
 [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]
->>> rdd._jrdd.count()
-8L
+>>> int(rdd._jrdd.count())
+8
 >>> sc.stop()
 """
 
@@ -556,4 +556,6 @@ def write_with_length(obj, stream):
 
 if __name__ == '__main__':
     import doctest
-    doctest.testmod()
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        exit(-1)

http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/python/pyspark/shuffle.py
----------------------------------------------------------------------
diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py
index 67752c0..8fb71ba 100644
--- a/python/pyspark/shuffle.py
+++ b/python/pyspark/shuffle.py
@@ -838,4 +838,6 @@ class ExternalGroupBy(ExternalMerger):
 
 if __name__ == "__main__":
     import doctest
-    doctest.testmod()
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        exit(-1)

http://git-wip-us.apache.org/repos/asf/spark/blob/41afa165/python/pyspark/streaming/util.py
----------------------------------------------------------------------
diff --git a/python/pyspark/streaming/util.py b/python/pyspark/streaming/util.py
index 34291f3..a9bfec2 100644
--- a/python/pyspark/streaming/util.py
+++ b/python/pyspark/streaming/util.py
@@ -125,4 +125,6 @@ def rddToFileName(prefix, suffix, timestamp):
 
 if __name__ == "__main__":
     import doctest
-    doctest.testmod()
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        exit(-1)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org