You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/08/12 14:05:02 UTC
[spark] branch master updated: [SPARK-40057][PYTHON][DOCS] Cleanup "" in doctest

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 350a74fe8d5 [SPARK-40057][PYTHON][DOCS] Cleanup "<BLANKLINE>" in doctest
350a74fe8d5 is described below

commit 350a74fe8d5d5f0f82dac4e3123e71d896bdf09a
Author: Yikun Jiang <yi...@gmail.com>
AuthorDate: Fri Aug 12 23:04:47 2022 +0900

    [SPARK-40057][PYTHON][DOCS] Cleanup "<BLANKLINE>" in doctest
    
    ### What changes were proposed in this pull request?
    Cleanup `<BLANKLINE>` in doctest
    
    ### Why are the changes needed?
    See https://github.com/apache/spark/pull/37465#discussion_r943071168, we'd better to cleanup all `<BLANKLINE>` in doctest to make doctest code more clear.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    CI passed
    
    Closes #37492 from Yikun/blankline.
    
    Authored-by: Yikun Jiang <yi...@gmail.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 python/pyspark/ml/stat.py       |  8 +++-----
 python/pyspark/mllib/tree.py    |  9 +++------
 python/pyspark/pandas/frame.py  |  1 -
 python/pyspark/pandas/series.py | 38 +++++++++++++++++---------------------
 python/pyspark/sql/dataframe.py |  1 -
 5 files changed, 23 insertions(+), 34 deletions(-)

diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py
index b91ef1b6cb3..704d2dc9baa 100644
--- a/python/pyspark/ml/stat.py
+++ b/python/pyspark/ml/stat.py
@@ -274,28 +274,24 @@ class Summarizer:
     +-----------------------------------+
     |{[1.0,1.0,1.0], 1}                 |
     +-----------------------------------+
-    <BLANKLINE>
     >>> df.select(summarizer.summary(df.features)).show(truncate=False)
     +--------------------------------+
     |aggregate_metrics(features, 1.0)|
     +--------------------------------+
     |{[1.0,1.5,2.0], 2}              |
     +--------------------------------+
-    <BLANKLINE>
     >>> df.select(Summarizer.mean(df.features, df.weight)).show(truncate=False)
     +--------------+
     |mean(features)|
     +--------------+
     |[1.0,1.0,1.0] |
     +--------------+
-    <BLANKLINE>
     >>> df.select(Summarizer.mean(df.features)).show(truncate=False)
     +--------------+
     |mean(features)|
     +--------------+
     |[1.0,1.5,2.0] |
     +--------------+
-    <BLANKLINE>
     """
 
     @staticmethod
@@ -519,7 +515,9 @@ if __name__ == "__main__":
     globs["sc"] = sc
     globs["spark"] = spark
 
-    failure_count, test_count = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    failure_count, test_count = doctest.testmod(
+        globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
+    )
     spark.stop()
     if failure_count:
         sys.exit(-1)
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index e1d87e99c8a..8a5c25d96a7 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -273,7 +273,6 @@ class DecisionTree:
            Predict: 0.0
           Else (feature 0 > 0.5)
            Predict: 1.0
-        <BLANKLINE>
         >>> model.predict(array([1.0]))
         1.0
         >>> model.predict(array([0.0]))
@@ -511,10 +510,8 @@ class RandomForest:
         7
         >>> print(model)
         TreeEnsembleModel classifier with 3 trees
-        <BLANKLINE>
         >>> print(model.toDebugString())
         TreeEnsembleModel classifier with 3 trees
-        <BLANKLINE>
           Tree 0:
             Predict: 1.0
           Tree 1:
@@ -527,7 +524,6 @@ class RandomForest:
              Predict: 0.0
             Else (feature 0 > 1.5)
              Predict: 1.0
-        <BLANKLINE>
         >>> model.predict([2.0])
         1.0
         >>> model.predict([0.0])
@@ -764,7 +760,6 @@ class GradientBoostedTrees:
         30
         >>> print(model)  # it already has newline
         TreeEnsembleModel classifier with 10 trees
-        <BLANKLINE>
         >>> model.predict([2.0])
         1.0
         >>> model.predict([0.0])
@@ -881,7 +876,9 @@ def _test() -> None:
 
     spark = SparkSession.builder.master("local[4]").appName("mllib.tree tests").getOrCreate()
     globs["sc"] = spark.sparkContext
-    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    (failure_count, test_count) = doctest.testmod(
+        globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
+    )
     spark.stop()
     if failure_count:
         sys.exit(-1)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 465541abdaa..b3ded9885fc 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -2217,7 +2217,6 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
          Donatello &  purple &  bo staff \\
         \bottomrule
         \end{tabular}
-        <BLANKLINE>
         """
 
         args = locals()
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index ff4c7fcc8f1..62eaa3eb1ca 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -2775,11 +2775,10 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
         Examples
         --------
         >>> psser = ps.Series([2, 1, 3, 3], name='A')
-        >>> psser.unique().sort_values()  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-        <BLANKLINE>
-        ...  1
-        ...  2
-        ...  3
+        >>> psser.unique().sort_values()
+        1    1
+        0    2
+        2    3
         Name: A, dtype: int64
 
         >>> ps.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique()
@@ -2787,11 +2786,10 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
         dtype: datetime64[ns]
 
         >>> psser.name = ('x', 'a')
-        >>> psser.unique().sort_values()  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-        <BLANKLINE>
-        ...  1
-        ...  2
-        ...  3
+        >>> psser.unique().sort_values()
+        1    1
+        0    2
+        2    3
         Name: (x, a), dtype: int64
         """
         sdf = self._internal.spark_frame.select(self.spark.column).distinct()
@@ -4718,21 +4716,19 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
         13    NaN
         dtype: float64
 
-        >>> s.mode().sort_values()  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-        <BLANKLINE>
-        ...  1.0
-        ...  2.0
-        ...  3.0
+        >>> s.mode().sort_values()
+        0    1.0
+        1    2.0
+        2    3.0
         dtype: float64
 
         With 'dropna' set to 'False', we can also see NaN in the result
 
-        >>> s.mode(False).sort_values()  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-        <BLANKLINE>
-        ...  1.0
-        ...  2.0
-        ...  3.0
-        ...  NaN
+        >>> s.mode(False).sort_values()
+        0    1.0
+        1    2.0
+        2    3.0
+        3    NaN
         dtype: float64
         """
         ser_count = self.value_counts(dropna=dropna, sort=False)
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 8ab3ed35578..565d3304596 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -387,7 +387,6 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
         root
          |-- age: integer (nullable = true)
          |-- name: string (nullable = true)
-        <BLANKLINE>
         """
         print(self._jdf.schema().treeString())
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org