You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/08/30 07:25:43 UTC
[spark] branch master updated: [SPARK-40270][PS] Make 'compute.max_rows' as None working in DataFrame.style

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 0f0e8cc26b6 [SPARK-40270][PS] Make 'compute.max_rows' as None working in DataFrame.style
0f0e8cc26b6 is described below

commit 0f0e8cc26b6c80cc179368e3009d4d6c88103a64
Author: Hyukjin Kwon <gu...@apache.org>
AuthorDate: Tue Aug 30 16:25:26 2022 +0900

    [SPARK-40270][PS] Make 'compute.max_rows' as None working in DataFrame.style
    
    ### What changes were proposed in this pull request?
    
    This PR make `compute.max_rows` option as `None` working in `DataFrame.style`, as expected instead of throwing an exception., by collecting it all to a pandas DataFrame.
    
    ### Why are the changes needed?
    
    To make the configuration working as expected.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes.
    
    ```python
    import pyspark.pandas as ps
    ps.set_option("compute.max_rows", None)
    ps.get_option("compute.max_rows")
    ps.range(1).style
    ```
    
    **Before:**
    
    ```
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
      File "/.../spark/python/pyspark/pandas/frame.py", line 3656, in style
        pdf = self.head(max_results + 1)._to_internal_pandas()
    TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'
    ```
    
    **After:**
    
    ```
    <pandas.io.formats.style.Styler object at 0x7fdf78250430>
    ```
    
    ### How was this patch tested?
    
    Manually tested, and unittest was added.
    
    Closes #37718 from HyukjinKwon/SPARK-40270.
    
    Authored-by: Hyukjin Kwon <gu...@apache.org>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 python/pyspark/pandas/frame.py                | 16 +++++++++-------
 python/pyspark/pandas/tests/test_dataframe.py | 16 +++++++++++-----
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index ba5df94c86c..8fc425e88a3 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -3754,19 +3754,21 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
         Property returning a Styler object containing methods for
         building a styled HTML representation for the DataFrame.
 
-        .. note:: currently it collects top 1000 rows and return its
-            pandas `pandas.io.formats.style.Styler` instance.
-
         Examples
         --------
         >>> ps.range(1001).style  # doctest: +SKIP
         <pandas.io.formats.style.Styler object at ...>
         """
         max_results = get_option("compute.max_rows")
-        pdf = self.head(max_results + 1)._to_internal_pandas()
-        if len(pdf) > max_results:
-            warnings.warn("'style' property will only use top %s rows." % max_results, UserWarning)
-        return pdf.head(max_results).style
+        if max_results is not None:
+            pdf = self.head(max_results + 1)._to_internal_pandas()
+            if len(pdf) > max_results:
+                warnings.warn(
+                    "'style' property will only use top %s rows." % max_results, UserWarning
+                )
+            return pdf.head(max_results).style
+        else:
+            return self._to_internal_pandas().style
 
     def set_index(
         self,
diff --git a/python/pyspark/pandas/tests/test_dataframe.py b/python/pyspark/pandas/tests/test_dataframe.py
index 2ab908fed00..34480152f8c 100644
--- a/python/pyspark/pandas/tests/test_dataframe.py
+++ b/python/pyspark/pandas/tests/test_dataframe.py
@@ -6904,12 +6904,18 @@ class DataFrameTest(ComparisonTestBase, SQLTestUtils):
         def style_negative(v, props=""):
             return props if v < 0 else None
 
-        # If the value is negative, the text color will be displayed as red.
-        pdf_style = pdf.style.applymap(style_negative, props="color:red;")
-        psdf_style = psdf.style.applymap(style_negative, props="color:red;")
+        def check_style():
+            # If the value is negative, the text color will be displayed as red.
+            pdf_style = pdf.style.applymap(style_negative, props="color:red;")
+            psdf_style = psdf.style.applymap(style_negative, props="color:red;")
 
-        # Test whether the same shape as pandas table is created including the color.
-        self.assert_eq(pdf_style.to_latex(), psdf_style.to_latex())
+            # Test whether the same shape as pandas table is created including the color.
+            self.assert_eq(pdf_style.to_latex(), psdf_style.to_latex())
+
+        check_style()
+
+        with ps.option_context("compute.max_rows", None):
+            check_style()
 
 
 if __name__ == "__main__":


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org