You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/08/30 07:25:43 UTC
[spark] branch master updated: [SPARK-40270][PS] Make 'compute.max_rows' as None working in DataFrame.style
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 0f0e8cc26b6 [SPARK-40270][PS] Make 'compute.max_rows' as None working in DataFrame.style
0f0e8cc26b6 is described below
commit 0f0e8cc26b6c80cc179368e3009d4d6c88103a64
Author: Hyukjin Kwon <gu...@apache.org>
AuthorDate: Tue Aug 30 16:25:26 2022 +0900
[SPARK-40270][PS] Make 'compute.max_rows' as None working in DataFrame.style
### What changes were proposed in this pull request?
This PR make `compute.max_rows` option as `None` working in `DataFrame.style`, as expected instead of throwing an exception., by collecting it all to a pandas DataFrame.
### Why are the changes needed?
To make the configuration working as expected.
### Does this PR introduce _any_ user-facing change?
Yes.
```python
import pyspark.pandas as ps
ps.set_option("compute.max_rows", None)
ps.get_option("compute.max_rows")
ps.range(1).style
```
**Before:**
```
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/.../spark/python/pyspark/pandas/frame.py", line 3656, in style
pdf = self.head(max_results + 1)._to_internal_pandas()
TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'
```
**After:**
```
<pandas.io.formats.style.Styler object at 0x7fdf78250430>
```
### How was this patch tested?
Manually tested, and unittest was added.
Closes #37718 from HyukjinKwon/SPARK-40270.
Authored-by: Hyukjin Kwon <gu...@apache.org>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
python/pyspark/pandas/frame.py | 16 +++++++++-------
python/pyspark/pandas/tests/test_dataframe.py | 16 +++++++++++-----
2 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index ba5df94c86c..8fc425e88a3 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -3754,19 +3754,21 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
Property returning a Styler object containing methods for
building a styled HTML representation for the DataFrame.
- .. note:: currently it collects top 1000 rows and return its
- pandas `pandas.io.formats.style.Styler` instance.
-
Examples
--------
>>> ps.range(1001).style # doctest: +SKIP
<pandas.io.formats.style.Styler object at ...>
"""
max_results = get_option("compute.max_rows")
- pdf = self.head(max_results + 1)._to_internal_pandas()
- if len(pdf) > max_results:
- warnings.warn("'style' property will only use top %s rows." % max_results, UserWarning)
- return pdf.head(max_results).style
+ if max_results is not None:
+ pdf = self.head(max_results + 1)._to_internal_pandas()
+ if len(pdf) > max_results:
+ warnings.warn(
+ "'style' property will only use top %s rows." % max_results, UserWarning
+ )
+ return pdf.head(max_results).style
+ else:
+ return self._to_internal_pandas().style
def set_index(
self,
diff --git a/python/pyspark/pandas/tests/test_dataframe.py b/python/pyspark/pandas/tests/test_dataframe.py
index 2ab908fed00..34480152f8c 100644
--- a/python/pyspark/pandas/tests/test_dataframe.py
+++ b/python/pyspark/pandas/tests/test_dataframe.py
@@ -6904,12 +6904,18 @@ class DataFrameTest(ComparisonTestBase, SQLTestUtils):
def style_negative(v, props=""):
return props if v < 0 else None
- # If the value is negative, the text color will be displayed as red.
- pdf_style = pdf.style.applymap(style_negative, props="color:red;")
- psdf_style = psdf.style.applymap(style_negative, props="color:red;")
+ def check_style():
+ # If the value is negative, the text color will be displayed as red.
+ pdf_style = pdf.style.applymap(style_negative, props="color:red;")
+ psdf_style = psdf.style.applymap(style_negative, props="color:red;")
- # Test whether the same shape as pandas table is created including the color.
- self.assert_eq(pdf_style.to_latex(), psdf_style.to_latex())
+ # Test whether the same shape as pandas table is created including the color.
+ self.assert_eq(pdf_style.to_latex(), psdf_style.to_latex())
+
+ check_style()
+
+ with ps.option_context("compute.max_rows", None):
+ check_style()
if __name__ == "__main__":
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org